/******************************************
This program reads in OUTPUTS.grads_appended,
and pulls all relevant earnings data
from the PHF, using arrays to
output only the relevant information for each pik:

quarterly earnings by state
quarterly earnings overall

*****************************************/


/*********************************************************************************
   First, I need to output all the earnings from individiual degree observations 
   the output of this table is a pik-degree level-job level file, which can then
   be aggregated across qtime, and then made long. 
**********************************************************************************/
data earnings_raw (keep=pik opeid degcip year_grad qtime_grad 
                    deglevl_code ui_state nat_e: state_e: )
                    /view=earnings_raw;


        
     set OUTPUTS.grads_appended (in=in_grads)
         INPUTS.phf_interleave_b (in=in_phf) ;
     by pik ; 
        /* creating a hash table to store all the observations of a pik */
     if _n_ = 1 then do ; 
        declare hash h_grad(dataset:'OUTPUTS.grads_appended (obs=0)',ordered:'Y');
		rc=h_grad.defineKey('pik','opeid','degcip','year_grad','qtime_grad','deglevl_code','ui_state');
		rc=h_grad.defineData('pik','opeid','degcip','year_grad','qtime_grad','deglevl_code','ui_state');
		rc=h_grad.defineDone();
		declare hiter i_grad('h_grad');
     end;
    
    array arr_e(&startqtr.:&endqtr.) e&startqtr.-e&endqtr. ;
    array nat_e(&startqtr.:&endqtr.) nat_e&startqtr.-nat_e&endqtr. ;
    array state_e(&startqtr.:&endqtr.) state_e&startqtr.-state_e&endqtr. ;
    
    if in_grads then do; 
        rc=h_grad.add() ; 
    end;
        /* this is where I cycle through earnings */
    if in_phf then do ;
        rc=i_grad.first() ;
        do while (rc=0) ;
            do ii = &startqtr. to &endqtr. ;
                nat_e(ii) = arr_e(ii) ;
                state_e(ii)=arr_e(ii)*(ui_state=state) ;
		/*this will be zero otherwise */
            end;
            output;
            rc=i_grad.next() ;
        end;
     end;
     
     /* now I need to clear the hash table */
     if last.pik then do; 
        rc=h_grad.clear() ;
     end;
run;
                    
/***********************************
    PROC SUMMARY TO AGGREGATE
    OVER ALL JOBS
*************************************/
                   
proc summary data=earnings_raw nway;
        class pik opeid degcip year_grad qtime_grad deglevl_code ui_state;
        var nat_e: state_e: ; 
        output out=earn_totals (drop=_TYPE_ _FREQ_) 
                    sum(nat_e: state_e:)= ; 
run;
                    
data OUTPUTS.earn_long (keep=pik opeid degcip year_grad qtime_grad 
                       deglevl_code national_earnings 
                     instate_earnings qtime year quarter ui_state ) ; 
    
    set earn_totals ;
    by pik opeid degcip year_grad qtime_grad deglevl_code ;             
                    
                    
    array nat_e(&startqtr.:&endqtr.) nat_e&startqtr.-nat_e&endqtr. ;
    array state_e(&startqtr.:&endqtr.) state_e&startqtr.-state_e&endqtr. ;
        
    if first.deglevl_code then do ;
        national_earnings  = 0 ;
        instate_earnings = 0 ; 
    end;
        
    do ii = qtime_grad to &endqtr. ;
        national_earnings = nat_e(ii) ; 
        instate_earnings = state_e(ii) ;
        qtime = ii ; 
        year=%inv_qtime(qtime,year,location=datastep) ;
        quarter=%inv_qtime(qtime,quarter,location=datastep) ;
        output ;
    end;
    
run;

proc summary data=OUTPUTS.earn_long nway ;
    class pik year opeid degcip year_grad deglevl_code ui_state ;
    var national_earnings instate_earnings ; 
    output out=annual  sum(national_earnings instate_earnings) = 
                            national_earnings instate_earnings ;
run;
    
data seearn (drop=se_earn&starty.-se_earn&endy. self_emp:)  ;
    set INPUTS.seearnings_wide ;
    by pik ; 
    
    array se_earn{&starty.:&endy.} se_earn&starty.-se_earn&endy.; 
    array s_emp{&starty.:&endy.} self_emp&starty.-self_emp&endy.;
        
    do ii = 2002 to 2016 ; 
        flag_se = s_emp{ii} ;
        se_earnings = se_earn{ii} ;
        year = ii ;
        output ;
    end;
run;                            
                           
data OUTPUTS.allearn_long_annual  ;
     merge annual (in=a) seearn (in=b) ;
     by pik year ; 
      
     if missing(se_earnings) then se_earnings = 0 ; 
     if missing(flag_se) then flag_se = 0 ;
                            
     if a ; 
     total_earnings = national_earnings + se_earnings ; 
        
     if national_earnings >0 then logearn_national = log(national_earnings) ;
     if instate_earnings > 0 then logearn_instate = log(instate_earnings )  ;
     if total_earnings > 0 then logearn_total = log(total_earnings) ;    
run;
                            
proc export data=OUTPUTS.allearn_long_annual  
            outfile="&outstata/allearnings_ba_regs.dta" replace ; 
run;
                   
        
